/* 
*	ֲWatcomC++,1999414
*
*/

#include "gl.h"
#include "bitmap.hpp"

Bitmap16MMX::Bitmap16MMX() : Bitmap16( 16 )
{
}

void Bitmap16MMX::BlitInside( int x, int y, int sx, int sy, int w, int h )
{
	char** line = Bitmap16::line;

	__asm{
		push es;
		mov ax, ds;
		mov es, ax;
		/* copy from the bottom to the top */;
		mov eax, sy;
		mov ebx, h;
		dec ebx;
		add eax, ebx;
		mov sy, eax;
		add y, ebx;
		inc ebx;
		
		mov ecx, w;
		mov edx, sx;
		dec ecx;
		add edx, ecx;
		mov sx, edx;
		add x, ecx;
		std;
		inc ecx;
		shl sx, 1;
		shl x, 1;
		mov edx, y;
		shl eax, 2;
		shl edx, 2;
		
		cmp ecx, 1;
		jg blitinside16_dw;
		
	blitinside16_w: 	/* the width is one word */;
		mov esi, line;
		mov edi, esi;
		mov esi, dword ptr [esi+eax];
		add esi, sx;
		mov edi, dword ptr [edi+edx];
		add edi, x;
		mov ecx, w;
		repnz movsw;
		sub eax, 4;
		sub edx, 4;
		dec ebx;
		jnz blitinside16_w;
		jmp blitinside16_end;
		
	blitinside16_dw:	/* the width is larger than 1 */;
		mov ecx, w;
		mov esi, line;
		mov edi, esi;
		mov esi, dword ptr [esi+eax];
		add esi, sx;
		mov edi, dword ptr [edi+edx];
		add edi, x;
		shr ecx, 1;
		jnc blitinside16_dwdw;
		movsw;
	blitinside16_dwdw:;
		sub esi, 2;
		sub edi, 2;
		repnz movsd;
		sub eax, 4;
		sub edx, 4;
		dec ebx;
		jnz blitinside16_dw;
		
	blitinside16_end:;
		pop es;
	}

}

// blit from a bitmap to another bitmap
// no reverse at all, optimized for MMX
void Bitmap16MMX::BlitOutside( Bitmap* dest, int x, int y, int sx, int sy, int w, int h )
{
	char** dline = dest->line;
	char** sline = line;
	int dpitch = dest->pitch;
	int spitch = pitch;

	__asm{
		push es;
		mov ax, ds;
		mov es, ax;
		
		mov ebx, h;
		cld;
		mov ecx, w;
		mov eax, y;
		mov edx, sy;
		shl eax, 2;
		shl edx, 2;
		shl sx, 1;
		shl x, 1;
		mov esi, sline;
		mov esi, [esi+edx];
		add esi, sx;
		mov edi, dline;
		mov edi, [edi+eax];
		add edi, x;
		shl ecx, 1;
		mov edx, spitch;
		mov eax, dpitch;
		sub edx, ecx;
		sub eax, ecx;
		shr ecx, 1;
		
		cmp ecx, 4;
		jl width_less_than_4;
		
	blitoutside16_dwloop: /* width is larger than 4 */;
		shr ecx, 1;
		jnc blitoutside16_dwl;
		movsw;
	blitoutside16_dwl:;
		shr ecx, 1;
		jnc blitoutside16_qw;
		movsd;
	blitoutside16_qw:;
		movq mm0, [esi];
		add esi, 8;
		movq [edi], mm0;
		add edi, 8;
		loop blitoutside16_qw;
		mov ecx, w;
		add esi, edx;
		add edi, eax;
		dec ebx;
		jnz blitoutside16_dwloop;
		emms;
		jmp blitoutside16_end;
		
	width_less_than_4:;
		repnz movsw;
		add esi, edx;
		add edi, eax;
		mov ecx, w;
		dec ebx;
		jnz width_less_than_4;
		
	blitoutside16_end:;
		pop es;
	}
}

void Bitmap16MMX::BlitMasked16( Bitmap* dest, int x, int y, int sx, int sy, int w, int h )
{
	char** dline = dest->line;
	char** sline = line;
	long colorkey = colorKey;

	__asm{
		push es;
		mov ax, ds;
		mov es, ax;
		
		mov ebx, h;
		shl y, 2;
		shl sy, 2;
		shl sx, 1;
		shl x, 1;
		mov esi, sline;
		add esi, sy;
		mov esi, [esi];
		add esi, sx;
		mov edi, dline;
		add edi, y;
		mov edi, [edi];
	/*	add edi, x;
		mov ecx, w;
		shl ecx, 1;
		sub spitch, ecx;
		sub dpitch, ecx;
		cmp ecx, 8;
		jge use_MMX;
	*/	
		mov eax, dword ptr colorkey;
	blitmasked16_w:;
		mov ecx, w;
	blitmask_pixel:;
		mov edx, [esi];
		cmp eax, edx;
		je trans_pixel;
		mov [edi], edx;
	trans_pixel:;
		add esi, 2;
		add edi, 2;
	/*	loop blitmask_pixel;
		add esi, spitch;
		add edi, dpitch;
		dec ebx;
		jnz blitmasked16_w;
		jmp blitmasked_end;
		
	use_MMX:;
		mov eax, word ptr colorkey;
		mov ecx, w;
		lea edx, colorkey;
		mov [edx+4], eax;
		movq mm0, colorkey;
	*/	movq mm1, mm0;
		packssdw mm0, mm1 	/* get colorkey */;
		mov h, ecx;
		and ecx, 0x3;
		shr h, 2;
		mov w, ecx;
	blit_head:;
	/*	jcxz only_4 */	/* ˵ */;
	blit_l1:;
		mov edx, [esi];
	/*	cmp edx, word ptr colorkey;
	*/	je trans_pixel_mmx;
		mov [edi], edx;
	trans_pixel_mmx:;
		add esi, 2;
		add edi, 2;
		loop blit_l1;
		
		mov ecx, h;
	/*	mov eax, 8;
	blit_MMX_loop:;
		movq mm1, [esi];
		movq mm2, mm1;
		pcmpeqw mm1, mm0;
		pandw mm2, mm1;
		movq mm3, [edi];
		pandnw mm3, mm1;
		porw mm3, mm2;
		movq [edi], mm3;
		add esi, eax;
		add edi, eax;
		loop blit_MMX_loop;
		add esi, spitch;
		add edi, dpitch;
	*/	mov ecx, w;
		dec ebx;
		jnz blit_head;
	blitmasked_end:;
		pop es;
		emms;
	}
}

void Bitmap16MMX::Clear( int l, int t, int w, int h, int color )
{
	char **line = Bitmap16::line;
	int pitch = Bitmap::pitch;
	
	color = (color & pixelInfo->colorRedMask) >> pixelInfo->colorRedPos //red
		| (color & pixelInfo->colorGreenMask) >> 5	// green	
		| (color & pixelInfo->colorBlueMask) >> 3;	//blue
	
	if( clip ){
		if( l < cl ){
			w -= cl - l;
			l = cl;
		}
		if( t < ct ){
			h -= ct - t;
			t = ct;
		}
		if( l + w > cr )
			w = cr - l;
		if( t + h > cb )
			h = cb - t;
	}
	else{
		if( l < 0 ){
			w += l;
			l = 0;
		}
		if( t < 0 ){
			h += t;
			t = 0;
		}
		if( l + w > width )
			w = width - l;
		if( t + h > height )
			h = height - t;
	}
	
	__asm{
		push es;
		mov ax, ds;
		mov es, ax;
		
		mov eax, color;
		mov ebx, eax;
		shl eax, 16;
		mov edx, t;
		mov ax, bx;
		mov ebx, h;
		shl edx, 2;
		cld;
		mov esi, l;
		shl esi, 1;
		mov ecx, w;
		mov edi, line;
		mov edi, [edi+edx];
		add edi, esi;
		mov edx, pitch;
		shl ecx, 1 
		sub edx, ecx;
		shr ecx, 2;
	/*	jnc clear16_dw;
	*/	
		clear16_end:;
		pop es;
	}
}

// blend this bitmap( alpha ) and src( 255-aplha ) bitmap to dest, 
// alpha 0-255, optimized for MMX
void Bitmap16MMX::BlitBlend( Bitmap* dest, int x, int y,
		int sx, int sy, int w, int h, int alpha, int method )
{
	char **dline = dest->line;
	char **sline = line;
	int colorkey = colorKey;
	
	BLITCLIP();
	
	__asm{
		shl x, 1;
		shl sx, 1;
		cld;
		shl sy, 2;
		shl y, 2;
		
	blit_line:;
		mov esi, sline;
		add esi, sy;
		mov esi, [esi];
		add esi, sx;
		mov edi, dline;
		add edi, y;
		mov edi, [edi];
		add edi, x;
		mov ecx, w;

	blitloop:;
		lodsw;
		mov dx, [edi];
		call [ebx];
		mov [edi], ax;
		add edi, 2;
		loop blitloop;
		
		add y, 4;
		add sy, 4;
		dec h;
		jnz blit_line;
	}
}

